1
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv("C:/Users/Chhavi Bhadana/Downloads/KCLT.csv")
df.columns
Index(['date', 'actual_mean_temp', 'actual_min_temp', 'actual_max_temp',
'average_min_temp', 'average_max_temp', 'record_min_temp',
'record_max_temp', 'record_min_temp_year', 'record_max_temp_year',
'actual_precipitation', 'average_precipitation',
'record_precipitation'],
dtype='object')
2.
x = df['date']
y1 = df['actual_max_temp']
y2 = df['actual_min_temp']
y3 = df['actual_mean_temp']
plt.bar(x, y1, color = 'pink', label = '1 = max_temp')
plt.bar(x, y3, color = 'blue', label = '3 = mean_temp')
plt.bar(x, y2, color = 'yellow', label = '2 = min_temp')
plt.legend()
plt.annotate('min1', xy=(df['date'][y1.idxmin()], y1.min()))
plt.annotate('min2', xy=(df['date'][y2.idxmin()], y2.min()))
plt.annotate('min3', xy = (df['date'][y3.idxmin()], y3.min()))
plt.annotate('max1', xy =(df['date'][y1.idxmax()], y1.max()))
plt.annotate('max2', xy =(df['date'][y2.idxmax()], y2.max()))
plt.annotate('max3', xy =(df['date'][y3.idxmax()], y3.max()))
Text(2015-06-24 00:00:00, 88, 'max3')
3.
df['date'] = pd.to_datetime(df['date'])
df1 = df[df['date'].dt.year == 2014]
x1 = df1[df1['date'].dt.month == 8]
x1 = x1['actual_mean_temp']
plt.hist(x1)
(array([ 2., 1., 4., 1., 2., 1., 10., 3., 4., 3.]), array([68. , 69.5, 71. , 72.5, 74. , 75.5, 77. , 78.5, 80. , 81.5, 83. ]), <BarContainer object of 10 artists>)
df2 = df[df['date'].dt.year == 2015]
x2 = df2[df2['date'].dt.month == 3]
x2 = x2['actual_mean_temp']
plt.hist(x2)
(array([2., 2., 1., 2., 4., 8., 3., 6., 2., 1.]), array([35. , 38.6, 42.2, 45.8, 49.4, 53. , 56.6, 60.2, 63.8, 67.4, 71. ]), <BarContainer object of 10 artists>)
4.
df.boxplot(['actual_mean_temp', 'actual_min_temp', 'actual_max_temp'])
<AxesSubplot:>
Observations: None of the the three datasets have outliers The max temprature recorded was 100 The min temprature recorded was approximately 5 The mean of actual_mean_temp is close to the third quartile of actual_min_temp
5.
col1 = df['actual_mean_temp']
col2 = df['actual_max_temp']
corelation = col1.corr(col2)
print(corelation)
0.9731336995939666
The correlaton between actual min temp and actual max temp is positive
6.
a = df['actual_min_temp']
plt.scatter(col1, a, color = 'purple', alpha = 0.5)
plt.xlabel('actual mean temp')
plt.ylabel('acctual min temp')
Text(0, 0.5, 'acctual min temp')
plt.scatter(col1, col2, color = 'g', alpha = 0.5)
plt.xlabel('actual mean temp')
plt.ylabel('acctual max temp')
Text(0, 0.5, 'acctual max temp')
7.
import seaborn as sn
corrMatrix = df.corr()
sn.heatmap(corrMatrix)
<AxesSubplot:>
sn.set_theme(style = 'ticks')
sn.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x1b7111771c8>